{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Copyright (c) 2020, NVIDIA CORPORATION.\n",
    "Licensed under the Apache License, Version 2.0 (the \"License\");\n",
    "you may not use this file except in compliance with the License.\n",
    "You may obtain a copy of the License at\n",
    "    http://www.apache.org/licenses/LICENSE-2.0\n",
    "Unless required by applicable law or agreed to in writing, software\n",
    "distributed under the License is distributed on an \"AS IS\" BASIS,\n",
    "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
    "See the License for the specific language governing permissions and\n",
    "limitations under the License."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline  \n",
    "\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import gc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(146256004, 24)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>hashtags</th>\n",
       "      <th>tweet_id</th>\n",
       "      <th>media</th>\n",
       "      <th>links</th>\n",
       "      <th>domains</th>\n",
       "      <th>tweet_type</th>\n",
       "      <th>language</th>\n",
       "      <th>timestamp</th>\n",
       "      <th>a_user_id</th>\n",
       "      <th>a_follower_count</th>\n",
       "      <th>...</th>\n",
       "      <th>b_follower_count</th>\n",
       "      <th>b_following_count</th>\n",
       "      <th>b_is_verified</th>\n",
       "      <th>b_account_creation</th>\n",
       "      <th>b_follows_a</th>\n",
       "      <th>reply</th>\n",
       "      <th>retweet</th>\n",
       "      <th>retweet_comment</th>\n",
       "      <th>like</th>\n",
       "      <th>id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>11</td>\n",
       "      <td>1581262691</td>\n",
       "      <td>0</td>\n",
       "      <td>986</td>\n",
       "      <td>...</td>\n",
       "      <td>94</td>\n",
       "      <td>648</td>\n",
       "      <td>False</td>\n",
       "      <td>1478011810</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>83D6C79F5FCEC8D1CAD9E82C2C261611\\tFFAD2DCF664C...</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>11</td>\n",
       "      <td>1581497241</td>\n",
       "      <td>1</td>\n",
       "      <td>1225</td>\n",
       "      <td>...</td>\n",
       "      <td>1139</td>\n",
       "      <td>46</td>\n",
       "      <td>False</td>\n",
       "      <td>1540395738</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "      <td>1581497559</td>\n",
       "      <td>0</td>\n",
       "      <td>1581497622</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>DDFFB4C01DB85921C3580F614575AA6D</td>\n",
       "      <td>BE4539C53C53FFABCFD232DB100C792B</td>\n",
       "      <td>2</td>\n",
       "      <td>11</td>\n",
       "      <td>1580978528</td>\n",
       "      <td>2</td>\n",
       "      <td>3016</td>\n",
       "      <td>...</td>\n",
       "      <td>780</td>\n",
       "      <td>440</td>\n",
       "      <td>False</td>\n",
       "      <td>1432084055</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1581060554</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>NaN</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>54</td>\n",
       "      <td>1581321849</td>\n",
       "      <td>3</td>\n",
       "      <td>2121</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>45</td>\n",
       "      <td>False</td>\n",
       "      <td>1534313747</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1581328518</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>NaN</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>11</td>\n",
       "      <td>1580956787</td>\n",
       "      <td>4</td>\n",
       "      <td>813505</td>\n",
       "      <td>...</td>\n",
       "      <td>171</td>\n",
       "      <td>388</td>\n",
       "      <td>False</td>\n",
       "      <td>1490166885</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1580957807</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 24 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                            hashtags  tweet_id  media  \\\n",
       "0                                                NaN         0      0   \n",
       "1  83D6C79F5FCEC8D1CAD9E82C2C261611\\tFFAD2DCF664C...         1      0   \n",
       "2                                                NaN         2      0   \n",
       "3                                                NaN         3      0   \n",
       "4                                                NaN         4      5   \n",
       "\n",
       "                              links                           domains  \\\n",
       "0                               NaN                               NaN   \n",
       "1                               NaN                               NaN   \n",
       "2  DDFFB4C01DB85921C3580F614575AA6D  BE4539C53C53FFABCFD232DB100C792B   \n",
       "3                               NaN                               NaN   \n",
       "4                               NaN                               NaN   \n",
       "\n",
       "   tweet_type  language   timestamp  a_user_id  a_follower_count  ...  \\\n",
       "0           2        11  1581262691          0               986  ...   \n",
       "1           1        11  1581497241          1              1225  ...   \n",
       "2           2        11  1580978528          2              3016  ...   \n",
       "3           1        54  1581321849          3              2121  ...   \n",
       "4           2        11  1580956787          4            813505  ...   \n",
       "\n",
       "   b_follower_count  b_following_count  b_is_verified  b_account_creation  \\\n",
       "0                94                648          False          1478011810   \n",
       "1              1139                 46          False          1540395738   \n",
       "2               780                440          False          1432084055   \n",
       "3                 1                 45          False          1534313747   \n",
       "4               171                388          False          1490166885   \n",
       "\n",
       "   b_follows_a  reply     retweet  retweet_comment        like  id  \n",
       "0        False      0           0                0           0   0  \n",
       "1         True      0  1581497559                0  1581497622   1  \n",
       "2         True      0           0                0  1581060554   2  \n",
       "3        False      0           0                0  1581328518   3  \n",
       "4        False      0           0                0  1580957807   4  \n",
       "\n",
       "[5 rows x 24 columns]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#Reload the output of previous notebook\n",
    "\n",
    "df = pd.read_pickle('tmp.pkl')\n",
    "print( df.shape )\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 4min 20s, sys: 18.2 s, total: 4min 38s\n",
      "Wall time: 4min 38s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "\n",
    "df['len_hashtags'] = df['hashtags'].apply(lambda x: str(x).count('\\t')+1 if not(pd.isnull(x)) else 0)\n",
    "df['len_domains']  = df['domains'].apply(lambda x: str(x).count('\\t')+1 if not(pd.isnull(x)) else 0)\n",
    "df['len_links']    = df['links'].apply(lambda x: str(x).count('\\t')+1 if not(pd.isnull(x)) else 0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 1min 2s, sys: 4.26 s, total: 1min 6s\n",
      "Wall time: 1min 6s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "var = df['domains'].fillna('').values.copy()\n",
    "gc.collect()\n",
    "\n",
    "PD = {}\n",
    "null = var[0]\n",
    "PD[null] = [0,0]\n",
    "count = 1\n",
    "for vs in var:\n",
    "    if vs != null:\n",
    "        for v in vs.split('\\t'):\n",
    "            if v not in PD:\n",
    "                PD[v] = [count,1]\n",
    "                count +=1\n",
    "            else:\n",
    "                x = PD[v]\n",
    "                x[1] += 1\n",
    "                PD[v] = x\n",
    "    else:\n",
    "        x = PD[null]\n",
    "        x[1] += 1\n",
    "        PD[null] = x\n",
    "        \n",
    "len(PD)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('', [0, 124468107]),\n",
       " ('BE4539C53C53FFABCFD232DB100C792B', [1, 16]),\n",
       " ('3330516EAADFD093A1C7B9DA83172DEB', [2, 5583]),\n",
       " ('8E1AE5ECC4EFDC42F77501ED9AA002ED', [3, 9548]),\n",
       " ('92D397F8E0F1E77B36B8C612C2C51E23', [4, 158685]),\n",
       " ('6D323BE93766E79BE423FAC5C28BE39B', [5, 5958]),\n",
       " ('5C6DB4F5101365C0E440A43F68EDBECD', [6, 61066]),\n",
       " ('8405A43A5DB3E450495AB2CB6C7CFB25', [7, 10367]),\n",
       " ('E91CDEC8DC7ABF30592FA024616FF970', [8, 1360727]),\n",
       " ('3896E26D12C903F0A00B6B1BE9A9BEA3', [9, 2228314])]"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "list(PD.items())[:10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 5min 31s, sys: 20.7 s, total: 5min 51s\n",
      "Wall time: 5min 51s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "vari = []\n",
    "for vs in var:\n",
    "    if vs != null:\n",
    "        li=[]\n",
    "        lf=[]\n",
    "        for v in vs.split('\\t'):\n",
    "            if v!='':\n",
    "                li.append(PD[v][0])\n",
    "                lf.append(-PD[v][1])\n",
    "        vari.append( list(np.array(li)[np.argsort(lf)].astype(np.int32) ) )\n",
    "    else:\n",
    "        vari.append( [0] )\n",
    "del PD\n",
    "gc.collect()\n",
    "\n",
    "len(vari), vari[:10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#Get only the most frequent domain\n",
    "df['domains'] = np.array( [v[0] for v in vari ] ).astype( np.int32 )\n",
    "gc.collect()\n",
    "del vari, var\n",
    "gc.collect()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 1min 15s, sys: 5.17 s, total: 1min 20s\n",
      "Wall time: 1min 20s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "var = df['links'].fillna('').values.copy()\n",
    "gc.collect()\n",
    "\n",
    "PD = {}\n",
    "null = var[0]\n",
    "PD[null] = [0,0]\n",
    "count = 1\n",
    "for vs in var:\n",
    "    if vs != null:\n",
    "        for v in vs.split('\\t'):\n",
    "            if v not in PD:\n",
    "                PD[v] = [count,1]\n",
    "                count +=1\n",
    "            else:\n",
    "                x = PD[v]\n",
    "                x[1] += 1\n",
    "                PD[v] = x\n",
    "    else:\n",
    "        x = PD[null]\n",
    "        x[1] += 1\n",
    "        PD[null] = x\n",
    "        \n",
    "len(PD),list(PD.items())[:10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 5min 53s, sys: 27.9 s, total: 6min 21s\n",
      "Wall time: 6min 15s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "vari = []\n",
    "for vs in var:\n",
    "    if vs != null:\n",
    "        li=[]\n",
    "        lf=[]\n",
    "        for v in vs.split('\\t'):\n",
    "            if v!='':\n",
    "                li.append(PD[v][0])\n",
    "                lf.append(-PD[v][1])\n",
    "        vari.append( list(np.array(li)[np.argsort(lf)].astype(np.int32) ) )\n",
    "    else:\n",
    "        vari.append( [0] )\n",
    "del PD\n",
    "gc.collect()\n",
    "\n",
    "len(vari), vari[:10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#Get only the most frequent link\n",
    "df['links'] = np.array( [v[0] for v in vari ] ).astype( np.int32 )\n",
    "gc.collect()\n",
    "del vari, var\n",
    "gc.collect()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 1min 37s, sys: 5.01 s, total: 1min 42s\n",
      "Wall time: 1min 42s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "var = df['hashtags'].fillna('').values.copy()\n",
    "gc.collect()\n",
    "\n",
    "PD = {}\n",
    "null = var[0]\n",
    "PD[null] = [0,0]\n",
    "count = 1\n",
    "for vs in var:\n",
    "    if vs != null:\n",
    "        for v in vs.split('\\t'):\n",
    "            if v not in PD:\n",
    "                PD[v] = [count,1]\n",
    "                count +=1\n",
    "            else:\n",
    "                x = PD[v]\n",
    "                x[1] += 1\n",
    "                PD[v] = x\n",
    "    else:\n",
    "        x = PD[null]\n",
    "        x[1] += 1\n",
    "        PD[null] = x\n",
    "        \n",
    "len(PD),list(PD.items())[:10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 8min 37s, sys: 33.5 s, total: 9min 11s\n",
      "Wall time: 9min 4s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "vari = []\n",
    "for vs in var:\n",
    "    if vs != null:\n",
    "        li=[]\n",
    "        lf=[]\n",
    "        for v in vs.split('\\t'):\n",
    "            if v!='':\n",
    "                li.append(PD[v][0])\n",
    "                lf.append(-PD[v][1])\n",
    "        vari.append( list(np.array(li)[np.argsort(lf)].astype(np.int32) ) )\n",
    "    else:\n",
    "        vari.append( [0] )\n",
    "del PD\n",
    "gc.collect()\n",
    "\n",
    "len(vari), vari[:10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#Get only the most frequent hashtag\n",
    "df['hashtags'] = np.array( [v[0] for v in vari ] ).astype( np.int32 )\n",
    "gc.collect()\n",
    "del vari, var\n",
    "gc.collect()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>hashtags</th>\n",
       "      <th>tweet_id</th>\n",
       "      <th>media</th>\n",
       "      <th>links</th>\n",
       "      <th>domains</th>\n",
       "      <th>tweet_type</th>\n",
       "      <th>language</th>\n",
       "      <th>timestamp</th>\n",
       "      <th>a_user_id</th>\n",
       "      <th>a_follower_count</th>\n",
       "      <th>...</th>\n",
       "      <th>b_account_creation</th>\n",
       "      <th>b_follows_a</th>\n",
       "      <th>reply</th>\n",
       "      <th>retweet</th>\n",
       "      <th>retweet_comment</th>\n",
       "      <th>like</th>\n",
       "      <th>id</th>\n",
       "      <th>len_hashtags</th>\n",
       "      <th>len_domains</th>\n",
       "      <th>len_links</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>12434798</th>\n",
       "      <td>238962</td>\n",
       "      <td>60783913</td>\n",
       "      <td>0</td>\n",
       "      <td>4286254</td>\n",
       "      <td>20</td>\n",
       "      <td>2</td>\n",
       "      <td>54</td>\n",
       "      <td>1581745564</td>\n",
       "      <td>30481</td>\n",
       "      <td>3184707</td>\n",
       "      <td>...</td>\n",
       "      <td>1322160066</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>146255964</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12434799</th>\n",
       "      <td>0</td>\n",
       "      <td>58383254</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>54</td>\n",
       "      <td>1582055658</td>\n",
       "      <td>14302</td>\n",
       "      <td>1474815</td>\n",
       "      <td>...</td>\n",
       "      <td>1322160066</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>146255965</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12434800</th>\n",
       "      <td>0</td>\n",
       "      <td>73443113</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>1581956484</td>\n",
       "      <td>221313</td>\n",
       "      <td>2301</td>\n",
       "      <td>...</td>\n",
       "      <td>1520545274</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>146255966</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12434801</th>\n",
       "      <td>0</td>\n",
       "      <td>73443114</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>1581611956</td>\n",
       "      <td>4597687</td>\n",
       "      <td>156</td>\n",
       "      <td>...</td>\n",
       "      <td>1520545274</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>146255967</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12434802</th>\n",
       "      <td>1089</td>\n",
       "      <td>73443115</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>54</td>\n",
       "      <td>1581948544</td>\n",
       "      <td>4704326</td>\n",
       "      <td>2440</td>\n",
       "      <td>...</td>\n",
       "      <td>1339719849</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>146255968</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12434803</th>\n",
       "      <td>0</td>\n",
       "      <td>73443116</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>54</td>\n",
       "      <td>1581694686</td>\n",
       "      <td>13774338</td>\n",
       "      <td>61</td>\n",
       "      <td>...</td>\n",
       "      <td>1339719849</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>146255969</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12434804</th>\n",
       "      <td>0</td>\n",
       "      <td>73443117</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>54</td>\n",
       "      <td>1581819568</td>\n",
       "      <td>3266917</td>\n",
       "      <td>757</td>\n",
       "      <td>...</td>\n",
       "      <td>1331963547</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>146255970</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12434805</th>\n",
       "      <td>0</td>\n",
       "      <td>57776596</td>\n",
       "      <td>9</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>54</td>\n",
       "      <td>1581959147</td>\n",
       "      <td>21953</td>\n",
       "      <td>5450254</td>\n",
       "      <td>...</td>\n",
       "      <td>1566249731</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>146255971</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12434806</th>\n",
       "      <td>0</td>\n",
       "      <td>58579802</td>\n",
       "      <td>0</td>\n",
       "      <td>4047785</td>\n",
       "      <td>10</td>\n",
       "      <td>2</td>\n",
       "      <td>11</td>\n",
       "      <td>1581591894</td>\n",
       "      <td>38873</td>\n",
       "      <td>309411</td>\n",
       "      <td>...</td>\n",
       "      <td>1578295645</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>146255972</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12434807</th>\n",
       "      <td>29139</td>\n",
       "      <td>58167668</td>\n",
       "      <td>7</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>11</td>\n",
       "      <td>1582095604</td>\n",
       "      <td>90513</td>\n",
       "      <td>130826</td>\n",
       "      <td>...</td>\n",
       "      <td>1399475256</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>146255973</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12434808</th>\n",
       "      <td>0</td>\n",
       "      <td>73443118</td>\n",
       "      <td>0</td>\n",
       "      <td>5339562</td>\n",
       "      <td>12747</td>\n",
       "      <td>2</td>\n",
       "      <td>54</td>\n",
       "      <td>1581569991</td>\n",
       "      <td>1171945</td>\n",
       "      <td>5220</td>\n",
       "      <td>...</td>\n",
       "      <td>1386615056</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>146255974</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12434809</th>\n",
       "      <td>0</td>\n",
       "      <td>73443119</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>54</td>\n",
       "      <td>1582013217</td>\n",
       "      <td>1258236</td>\n",
       "      <td>263</td>\n",
       "      <td>...</td>\n",
       "      <td>1542328769</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>146255975</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12434810</th>\n",
       "      <td>0</td>\n",
       "      <td>57797557</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>54</td>\n",
       "      <td>1581966742</td>\n",
       "      <td>3591</td>\n",
       "      <td>322449</td>\n",
       "      <td>...</td>\n",
       "      <td>1420506030</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>146255976</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12434811</th>\n",
       "      <td>21139</td>\n",
       "      <td>58096451</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>54</td>\n",
       "      <td>1581962995</td>\n",
       "      <td>45182</td>\n",
       "      <td>206233</td>\n",
       "      <td>...</td>\n",
       "      <td>1520006365</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>146255977</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12434812</th>\n",
       "      <td>0</td>\n",
       "      <td>73443120</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>1582010472</td>\n",
       "      <td>4339684</td>\n",
       "      <td>814</td>\n",
       "      <td>...</td>\n",
       "      <td>1404593435</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>146255978</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12434813</th>\n",
       "      <td>0</td>\n",
       "      <td>73443121</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>1581637768</td>\n",
       "      <td>11428550</td>\n",
       "      <td>631</td>\n",
       "      <td>...</td>\n",
       "      <td>1404593435</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>146255979</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12434814</th>\n",
       "      <td>0</td>\n",
       "      <td>73443122</td>\n",
       "      <td>0</td>\n",
       "      <td>5339563</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>54</td>\n",
       "      <td>1581577181</td>\n",
       "      <td>507456</td>\n",
       "      <td>68328</td>\n",
       "      <td>...</td>\n",
       "      <td>1503162103</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>146255980</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12434815</th>\n",
       "      <td>0</td>\n",
       "      <td>61407737</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>54</td>\n",
       "      <td>1581628573</td>\n",
       "      <td>22724</td>\n",
       "      <td>198527</td>\n",
       "      <td>...</td>\n",
       "      <td>1443317092</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>146255981</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12434816</th>\n",
       "      <td>0</td>\n",
       "      <td>73443123</td>\n",
       "      <td>0</td>\n",
       "      <td>5339564</td>\n",
       "      <td>60843</td>\n",
       "      <td>2</td>\n",
       "      <td>59</td>\n",
       "      <td>1581628357</td>\n",
       "      <td>1285951</td>\n",
       "      <td>226</td>\n",
       "      <td>...</td>\n",
       "      <td>1549100809</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>146255982</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12434817</th>\n",
       "      <td>0</td>\n",
       "      <td>58174299</td>\n",
       "      <td>5</td>\n",
       "      <td>3998807</td>\n",
       "      <td>66</td>\n",
       "      <td>2</td>\n",
       "      <td>54</td>\n",
       "      <td>1581656423</td>\n",
       "      <td>440262</td>\n",
       "      <td>270942</td>\n",
       "      <td>...</td>\n",
       "      <td>1373996313</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>146255983</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12434818</th>\n",
       "      <td>0</td>\n",
       "      <td>65545011</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>59</td>\n",
       "      <td>1581880725</td>\n",
       "      <td>33016</td>\n",
       "      <td>55824</td>\n",
       "      <td>...</td>\n",
       "      <td>1373996313</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>146255984</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12434819</th>\n",
       "      <td>0</td>\n",
       "      <td>73443124</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>63</td>\n",
       "      <td>1582133552</td>\n",
       "      <td>6504739</td>\n",
       "      <td>2615</td>\n",
       "      <td>...</td>\n",
       "      <td>1390674563</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>146255985</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12434820</th>\n",
       "      <td>0</td>\n",
       "      <td>73443125</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>1581801662</td>\n",
       "      <td>3726048</td>\n",
       "      <td>2283</td>\n",
       "      <td>...</td>\n",
       "      <td>1301557578</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>146255986</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12434821</th>\n",
       "      <td>0</td>\n",
       "      <td>67159851</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1581846664</td>\n",
       "      <td>20107</td>\n",
       "      <td>38665</td>\n",
       "      <td>...</td>\n",
       "      <td>1316243576</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>146255987</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12434822</th>\n",
       "      <td>481487</td>\n",
       "      <td>73443126</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>54</td>\n",
       "      <td>1581993928</td>\n",
       "      <td>13065749</td>\n",
       "      <td>285</td>\n",
       "      <td>...</td>\n",
       "      <td>1330046844</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>146255988</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12434823</th>\n",
       "      <td>2911062</td>\n",
       "      <td>73443127</td>\n",
       "      <td>9</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>54</td>\n",
       "      <td>1582060967</td>\n",
       "      <td>3583222</td>\n",
       "      <td>462</td>\n",
       "      <td>...</td>\n",
       "      <td>1486590789</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>146255989</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12434824</th>\n",
       "      <td>0</td>\n",
       "      <td>73443128</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>54</td>\n",
       "      <td>1582021793</td>\n",
       "      <td>11177590</td>\n",
       "      <td>784</td>\n",
       "      <td>...</td>\n",
       "      <td>1486590789</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>146255990</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12434825</th>\n",
       "      <td>3607</td>\n",
       "      <td>70581768</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>54</td>\n",
       "      <td>1582151584</td>\n",
       "      <td>24068</td>\n",
       "      <td>832866</td>\n",
       "      <td>...</td>\n",
       "      <td>1466743360</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>146255991</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12434826</th>\n",
       "      <td>0</td>\n",
       "      <td>73443129</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>11</td>\n",
       "      <td>1582123312</td>\n",
       "      <td>3001848</td>\n",
       "      <td>375</td>\n",
       "      <td>...</td>\n",
       "      <td>1495783496</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>146255992</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12434827</th>\n",
       "      <td>0</td>\n",
       "      <td>73443130</td>\n",
       "      <td>9</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>11</td>\n",
       "      <td>1582056004</td>\n",
       "      <td>5769684</td>\n",
       "      <td>672</td>\n",
       "      <td>...</td>\n",
       "      <td>1495783496</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>146255993</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12434828</th>\n",
       "      <td>1296011</td>\n",
       "      <td>58434533</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>11</td>\n",
       "      <td>1581787888</td>\n",
       "      <td>8055</td>\n",
       "      <td>195013</td>\n",
       "      <td>...</td>\n",
       "      <td>1491543663</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>146255994</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12434829</th>\n",
       "      <td>0</td>\n",
       "      <td>73443131</td>\n",
       "      <td>9</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>11</td>\n",
       "      <td>1581753911</td>\n",
       "      <td>6481908</td>\n",
       "      <td>246</td>\n",
       "      <td>...</td>\n",
       "      <td>1470923518</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>146255995</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12434830</th>\n",
       "      <td>0</td>\n",
       "      <td>73443132</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>54</td>\n",
       "      <td>1581892415</td>\n",
       "      <td>2575636</td>\n",
       "      <td>1434</td>\n",
       "      <td>...</td>\n",
       "      <td>1410580297</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>146255996</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12434831</th>\n",
       "      <td>0</td>\n",
       "      <td>73443133</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>38</td>\n",
       "      <td>1581832941</td>\n",
       "      <td>423991</td>\n",
       "      <td>606</td>\n",
       "      <td>...</td>\n",
       "      <td>1559838543</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>146255997</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12434832</th>\n",
       "      <td>0</td>\n",
       "      <td>71370202</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>54</td>\n",
       "      <td>1582001186</td>\n",
       "      <td>3838</td>\n",
       "      <td>71562</td>\n",
       "      <td>...</td>\n",
       "      <td>1466966626</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>146255998</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12434833</th>\n",
       "      <td>0</td>\n",
       "      <td>73443134</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>59</td>\n",
       "      <td>1582081341</td>\n",
       "      <td>1362744</td>\n",
       "      <td>5390</td>\n",
       "      <td>...</td>\n",
       "      <td>1428007228</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>146255999</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12434834</th>\n",
       "      <td>0</td>\n",
       "      <td>73443135</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>54</td>\n",
       "      <td>1582091134</td>\n",
       "      <td>179475</td>\n",
       "      <td>17747</td>\n",
       "      <td>...</td>\n",
       "      <td>1272381192</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>146256000</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12434835</th>\n",
       "      <td>0</td>\n",
       "      <td>73443136</td>\n",
       "      <td>9</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>1582086464</td>\n",
       "      <td>366281</td>\n",
       "      <td>4386</td>\n",
       "      <td>...</td>\n",
       "      <td>1235182992</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>146256001</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12434836</th>\n",
       "      <td>845560</td>\n",
       "      <td>73443137</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>11</td>\n",
       "      <td>1581665518</td>\n",
       "      <td>1030299</td>\n",
       "      <td>4236</td>\n",
       "      <td>...</td>\n",
       "      <td>1501554925</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>146256002</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12434837</th>\n",
       "      <td>6845</td>\n",
       "      <td>73443138</td>\n",
       "      <td>7</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>11</td>\n",
       "      <td>1581799075</td>\n",
       "      <td>6937005</td>\n",
       "      <td>4354</td>\n",
       "      <td>...</td>\n",
       "      <td>1501554925</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>146256003</td>\n",
       "      <td>10</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>40 rows × 27 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          hashtags  tweet_id  media    links  domains  tweet_type  language  \\\n",
       "12434798    238962  60783913      0  4286254       20           2        54   \n",
       "12434799         0  58383254      0        0        0           2        54   \n",
       "12434800         0  73443113      0        0        0           1         3   \n",
       "12434801         0  73443114      0        0        0           1         3   \n",
       "12434802      1089  73443115      0        0        0           1        54   \n",
       "12434803         0  73443116      0        0        0           1        54   \n",
       "12434804         0  73443117      0        0        0           2        54   \n",
       "12434805         0  57776596      9        0        0           2        54   \n",
       "12434806         0  58579802      0  4047785       10           2        11   \n",
       "12434807     29139  58167668      7        0        0           2        11   \n",
       "12434808         0  73443118      0  5339562    12747           2        54   \n",
       "12434809         0  73443119      5        0        0           1        54   \n",
       "12434810         0  57797557      0        0        0           0        54   \n",
       "12434811     21139  58096451      0        0        0           2        54   \n",
       "12434812         0  73443120      0        0        0           1         3   \n",
       "12434813         0  73443121      0        0        0           1         3   \n",
       "12434814         0  73443122      0  5339563        4           2        54   \n",
       "12434815         0  61407737      5        0        0           2        54   \n",
       "12434816         0  73443123      0  5339564    60843           2        59   \n",
       "12434817         0  58174299      5  3998807       66           2        54   \n",
       "12434818         0  65545011      0        0        0           2        59   \n",
       "12434819         0  73443124      5        0        0           0        63   \n",
       "12434820         0  73443125      0        0        0           1         4   \n",
       "12434821         0  67159851      0        0        0           1         0   \n",
       "12434822    481487  73443126      5        0        0           1        54   \n",
       "12434823   2911062  73443127      9        0        0           2        54   \n",
       "12434824         0  73443128      0        0        0           1        54   \n",
       "12434825      3607  70581768      0        0        0           1        54   \n",
       "12434826         0  73443129      5        0        0           2        11   \n",
       "12434827         0  73443130      9        0        0           1        11   \n",
       "12434828   1296011  58434533      5        0        0           1        11   \n",
       "12434829         0  73443131      9        0        0           1        11   \n",
       "12434830         0  73443132      0        0        0           1        54   \n",
       "12434831         0  73443133      0        0        0           0        38   \n",
       "12434832         0  71370202      0        0        0           0        54   \n",
       "12434833         0  73443134      0        0        0           1        59   \n",
       "12434834         0  73443135      0        0        0           1        54   \n",
       "12434835         0  73443136      9        0        0           1         4   \n",
       "12434836    845560  73443137      0        0        0           2        11   \n",
       "12434837      6845  73443138      7        0        0           2        11   \n",
       "\n",
       "           timestamp  a_user_id  a_follower_count  ...  b_account_creation  \\\n",
       "12434798  1581745564      30481           3184707  ...          1322160066   \n",
       "12434799  1582055658      14302           1474815  ...          1322160066   \n",
       "12434800  1581956484     221313              2301  ...          1520545274   \n",
       "12434801  1581611956    4597687               156  ...          1520545274   \n",
       "12434802  1581948544    4704326              2440  ...          1339719849   \n",
       "12434803  1581694686   13774338                61  ...          1339719849   \n",
       "12434804  1581819568    3266917               757  ...          1331963547   \n",
       "12434805  1581959147      21953           5450254  ...          1566249731   \n",
       "12434806  1581591894      38873            309411  ...          1578295645   \n",
       "12434807  1582095604      90513            130826  ...          1399475256   \n",
       "12434808  1581569991    1171945              5220  ...          1386615056   \n",
       "12434809  1582013217    1258236               263  ...          1542328769   \n",
       "12434810  1581966742       3591            322449  ...          1420506030   \n",
       "12434811  1581962995      45182            206233  ...          1520006365   \n",
       "12434812  1582010472    4339684               814  ...          1404593435   \n",
       "12434813  1581637768   11428550               631  ...          1404593435   \n",
       "12434814  1581577181     507456             68328  ...          1503162103   \n",
       "12434815  1581628573      22724            198527  ...          1443317092   \n",
       "12434816  1581628357    1285951               226  ...          1549100809   \n",
       "12434817  1581656423     440262            270942  ...          1373996313   \n",
       "12434818  1581880725      33016             55824  ...          1373996313   \n",
       "12434819  1582133552    6504739              2615  ...          1390674563   \n",
       "12434820  1581801662    3726048              2283  ...          1301557578   \n",
       "12434821  1581846664      20107             38665  ...          1316243576   \n",
       "12434822  1581993928   13065749               285  ...          1330046844   \n",
       "12434823  1582060967    3583222               462  ...          1486590789   \n",
       "12434824  1582021793   11177590               784  ...          1486590789   \n",
       "12434825  1582151584      24068            832866  ...          1466743360   \n",
       "12434826  1582123312    3001848               375  ...          1495783496   \n",
       "12434827  1582056004    5769684               672  ...          1495783496   \n",
       "12434828  1581787888       8055            195013  ...          1491543663   \n",
       "12434829  1581753911    6481908               246  ...          1470923518   \n",
       "12434830  1581892415    2575636              1434  ...          1410580297   \n",
       "12434831  1581832941     423991               606  ...          1559838543   \n",
       "12434832  1582001186       3838             71562  ...          1466966626   \n",
       "12434833  1582081341    1362744              5390  ...          1428007228   \n",
       "12434834  1582091134     179475             17747  ...          1272381192   \n",
       "12434835  1582086464     366281              4386  ...          1235182992   \n",
       "12434836  1581665518    1030299              4236  ...          1501554925   \n",
       "12434837  1581799075    6937005              4354  ...          1501554925   \n",
       "\n",
       "          b_follows_a  reply  retweet  retweet_comment  like         id  \\\n",
       "12434798        False      0        0                0     0  146255964   \n",
       "12434799        False      0        0                0     0  146255965   \n",
       "12434800        False      0        0                0     0  146255966   \n",
       "12434801         True      0        0                0     0  146255967   \n",
       "12434802         True      0        0                0     0  146255968   \n",
       "12434803         True      0        0                0     0  146255969   \n",
       "12434804         True      0        0                0     0  146255970   \n",
       "12434805        False      0        0                0     0  146255971   \n",
       "12434806        False      0        0                0     0  146255972   \n",
       "12434807        False      0        0                0     0  146255973   \n",
       "12434808        False      0        0                0     0  146255974   \n",
       "12434809         True      0        0                0     0  146255975   \n",
       "12434810        False      0        0                0     0  146255976   \n",
       "12434811        False      0        0                0     0  146255977   \n",
       "12434812        False      0        0                0     0  146255978   \n",
       "12434813         True      0        0                0     0  146255979   \n",
       "12434814         True      0        0                0     0  146255980   \n",
       "12434815        False      0        0                0     0  146255981   \n",
       "12434816         True      0        0                0     0  146255982   \n",
       "12434817        False      0        0                0     0  146255983   \n",
       "12434818        False      0        0                0     0  146255984   \n",
       "12434819        False      0        0                0     0  146255985   \n",
       "12434820        False      0        0                0     0  146255986   \n",
       "12434821        False      0        0                0     0  146255987   \n",
       "12434822         True      0        0                0     0  146255988   \n",
       "12434823        False      0        0                0     0  146255989   \n",
       "12434824         True      0        0                0     0  146255990   \n",
       "12434825        False      0        0                0     0  146255991   \n",
       "12434826         True      0        0                0     0  146255992   \n",
       "12434827         True      0        0                0     0  146255993   \n",
       "12434828        False      0        0                0     0  146255994   \n",
       "12434829         True      0        0                0     0  146255995   \n",
       "12434830        False      0        0                0     0  146255996   \n",
       "12434831         True      0        0                0     0  146255997   \n",
       "12434832        False      0        0                0     0  146255998   \n",
       "12434833        False      0        0                0     0  146255999   \n",
       "12434834        False      0        0                0     0  146256000   \n",
       "12434835        False      0        0                0     0  146256001   \n",
       "12434836         True      0        0                0     0  146256002   \n",
       "12434837         True      0        0                0     0  146256003   \n",
       "\n",
       "          len_hashtags  len_domains  len_links  \n",
       "12434798             3            1          1  \n",
       "12434799             0            0          0  \n",
       "12434800             0            0          0  \n",
       "12434801             0            0          0  \n",
       "12434802             3            0          0  \n",
       "12434803             0            0          0  \n",
       "12434804             0            0          0  \n",
       "12434805             0            0          0  \n",
       "12434806             0            1          1  \n",
       "12434807             1            0          0  \n",
       "12434808             0            1          1  \n",
       "12434809             0            0          0  \n",
       "12434810             0            0          0  \n",
       "12434811             1            0          0  \n",
       "12434812             0            0          0  \n",
       "12434813             0            0          0  \n",
       "12434814             0            1          1  \n",
       "12434815             0            0          0  \n",
       "12434816             0            1          1  \n",
       "12434817             0            1          1  \n",
       "12434818             0            0          0  \n",
       "12434819             0            0          0  \n",
       "12434820             0            0          0  \n",
       "12434821             0            0          0  \n",
       "12434822             1            0          0  \n",
       "12434823             1            0          0  \n",
       "12434824             0            0          0  \n",
       "12434825             1            0          0  \n",
       "12434826             0            0          0  \n",
       "12434827             0            0          0  \n",
       "12434828             1            0          0  \n",
       "12434829             0            0          0  \n",
       "12434830             0            0          0  \n",
       "12434831             0            0          0  \n",
       "12434832             0            0          0  \n",
       "12434833             0            0          0  \n",
       "12434834             0            0          0  \n",
       "12434835             0            0          0  \n",
       "12434836             1            0          0  \n",
       "12434837            10            0          0  \n",
       "\n",
       "[40 rows x 27 columns]"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.tail(40)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "hashtags               int32\n",
       "tweet_id              uint32\n",
       "media                  uint8\n",
       "links                  int32\n",
       "domains                int32\n",
       "tweet_type             uint8\n",
       "language               uint8\n",
       "timestamp             uint32\n",
       "a_user_id             uint32\n",
       "a_follower_count      uint32\n",
       "a_following_count     uint32\n",
       "a_is_verified           bool\n",
       "a_account_creation    uint32\n",
       "b_user_id             uint32\n",
       "b_follower_count      uint32\n",
       "b_following_count     uint32\n",
       "b_is_verified           bool\n",
       "b_account_creation    uint32\n",
       "b_follows_a             bool\n",
       "reply                 uint32\n",
       "retweet               uint32\n",
       "retweet_comment       uint32\n",
       "like                  uint32\n",
       "id                    uint32\n",
       "len_hashtags           int64\n",
       "len_domains            int64\n",
       "len_links              int64\n",
       "dtype: object"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.dtypes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((121386431, 27), (12434735, 27), (12434838, 27))"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train = df.iloc[ :121386431 ].copy()\n",
    "test0 = df.iloc[ 121386431:(121386431+12434735) ].copy()\n",
    "test1 = df.iloc[ (121386431+12434735): ].copy()\n",
    "\n",
    "train.shape, test0.shape, test1.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.to_parquet( 'train-1.parquet' )\n",
    "test0.to_parquet( 'test-0.parquet' )\n",
    "test1.to_parquet( 'test-1.parquet' )\n",
    "gc.collect()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
